{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pprint\n",
    "pp = pprint.PrettyPrinter(indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training data x:\n",
      "[[ 0.38667861]\n",
      " [ 0.12720744]\n",
      " [ 0.33110419]],\n",
      " training data y:\n",
      "[[0]\n",
      " [1]]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([array([[  1.36747614e-04,   4.49864923e-05,   1.17093901e-04],\n",
       "         [  1.53949069e-02,   5.06453343e-03,   1.31823120e-02],\n",
       "         [  7.02577183e-03,   2.31130051e-03,   6.01601016e-03],\n",
       "         [  4.04472163e-03,   1.33061070e-03,   3.46340403e-03]]),\n",
       "  array([[ 0.04027718,  0.04342117,  0.08546863,  0.10853641],\n",
       "         [-0.01213882, -0.01308636, -0.02575871, -0.03271092]])],\n",
       " [array([[ 0.00035365],\n",
       "         [ 0.03981318],\n",
       "         [ 0.01816954],\n",
       "         [ 0.01046016]]), array([[ 0.14671938],\n",
       "         [-0.04421858]])])"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 定义神经网络的模型架构 [input, hidden, output]\n",
    "network_sizes = [3,4,2]\n",
    "\n",
    "# 初始化该神经网络的参数\n",
    "sizes = network_sizes\n",
    "num_layers = len(sizes)\n",
    "biases = [np.random.randn(h, 1) for h in sizes[1:]]\n",
    "weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]\n",
    "\n",
    "def loss_der(network_y, real_y):\n",
    "    \"\"\"\n",
    "    返回损失函数的偏导，损失函数使用 MSE\n",
    "    L = 1/2(network_y-real_y)^2\n",
    "    delta_L = network_y-real_y\n",
    "    \"\"\"\n",
    "    return (network_y - real_y)\n",
    "\n",
    "\n",
    "def sigmoid(z):\n",
    "    \"\"\"激活函数使用 sigmoid.\"\"\"\n",
    "    return 1.0 / (1.0 + np.exp(-z))\n",
    "\n",
    "\n",
    "def sigmoid_der(z):\n",
    "    \"\"\"sigmoid函数的导数 derivative of sigmoid.\"\"\"\n",
    "    return sigmoid(z) * (1 - sigmoid(z))\n",
    "\n",
    "\n",
    "def backprop(x, y):\n",
    "    \"\"\"\n",
    "    根据损失函数 C通过反向传播算法返回\n",
    "    \"\"\"\n",
    "    \"\"\"Return a tuple \"(nabla_b, nabla_w)\" representing the\n",
    "    gradient for the cost function C_x.  \"nabla_b\" and\n",
    "    \"nabla_w\" are layer-by-layer lists of numpy arrays, similar\n",
    "    to \"self.biases\" and \"self.weights\".\"\"\"\n",
    "    \n",
    "    # 初始化网络参数的导数 权重w的偏导和偏置b的偏导\n",
    "    delta_w = [np.zeros(w.shape) for w in weights]\n",
    "    delta_b = [np.zeros(b.shape) for b in biases]\n",
    "    \n",
    "    # 向前传播 feed forward\n",
    "    activation = x     # 把输入的数据作为第一次激活值\n",
    "    activations = [x]  # 存储网络的激活值\n",
    "    zs = []            # 存储网络的加权输入值 (z=wx+b)\n",
    "    \n",
    "    for w, b in zip(weights, biases):\n",
    "        z = np.dot(w, activation) + b\n",
    "        activation = sigmoid(z)\n",
    "        \n",
    "        activations.append(activation)\n",
    "        zs.append(z)\n",
    "        \n",
    "    # 反向传播 back propagation \n",
    "    # BP1 计算输出层误差\n",
    "    delta_L = loss_der(activations[-1], y) * sigmoid_der(zs[-1])\n",
    "    # BP3 损失函数在输出层关于偏置的偏导\n",
    "    delta_b[-1] = delta_L\n",
    "    # BP4 损失函数在输出层关于权值的偏导\n",
    "    delta_w[-1] = np.dot(delta_L, activations[-2].transpose())\n",
    "\n",
    "    delta_l = delta_L\n",
    "    for l in range(2, num_layers):\n",
    "        # BP2 计算第l层误差\n",
    "        z = zs[-l] \n",
    "        sp = sigmoid_der(z)\n",
    "        delta_l = np.dot(weights[-l + 1].transpose(), delta_l) * sp\n",
    "        # BP3 损失函数在l层关于偏置的偏导\n",
    "        delta_b[-l] = delta_l\n",
    "        # BP4 损失函数在l层关于权值的偏导\n",
    "        delta_w[-l] = np.dot(delta_l, activations[-l - 1].transpose())\n",
    "\n",
    "    return (delta_w, delta_b)\n",
    "\n",
    "\n",
    "training_x = np.random.rand(3).reshape(3,1)\n",
    "training_y = np.array([0, 1]).reshape(2,1)\n",
    "print(\"training data x:\\n{},\\n training data y:\\n{}\".format(training_x, training_y))\n",
    "backprop(training_x, training_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
